here::set_here()
## File .here already exists in /Users/koenvandenberge/Dropbox/research/brainStat/hbcRegenGithub/scripts
suppressPackageStartupMessages({
library(slingshot)
library(SingleCellExperiment)
library(cowplot)
library(rgl)
library(clusterExperiment)
library(RColorBrewer)
library(ggplot2)
library(pheatmap)
library(wesanderson)
library(gridExtra)
library(irlba)
library(tidyverse)
})
sds <- readRDS("../data/finalTrajectory/sling.rds")
counts <- readRDS("../data/finalTrajectory/counts_noResp_noMV.rds")
# counts <- round(counts)
sce <- readRDS("../data/finalTrajectory/sce_tradeSeq20200904.rds")
load("../data/ALL_TF.Rda")
clDatta <- readRDS("../data/finalTrajectory/dattaCl_noResp_noMV.rds")
timePoint <- readRDS("../data/finalTrajectory/timePoint_noResp_noMV.rds")
I will select
cols <- c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#FFFF33", "#A65628", "#F781BF", "#999999")
view3d( theta = 10, phi = 10)
# Datta labels
plot3d(reducedDim(sds), aspect = 'iso', col=cols[clDatta], alpha=.6, xlab="UMAP1", ylab="UMAP2", zlab="UMAP3", box=FALSE, top=TRUE)
plot3d(sds, add=TRUE, lwd=3, col="black")
# timepoints
plot3d(reducedDim(sds), aspect = 'iso', col=cols[timePoint], alpha=.6, xlab="UMAP1", ylab="UMAP2", zlab="UMAP3", box=FALSE, top=TRUE)
plot3d(sds, add=TRUE, lwd=3, col="black")
# cluster labels used for TI
clSds <- apply(slingClusterLabels(sds), 1, which.max)
plot3d(reducedDim(sds), aspect = 'iso', col=cols[clSds], alpha=.6, xlab="UMAP1", ylab="UMAP2", zlab="UMAP3", box=FALSE, top=TRUE)
plot3d(sds, add=TRUE, lwd=3, col="black")
actHBCId <- which(clSds == 8 & timePoint == "24HPI")
restHBCId <- which(clDatta == "HBC" & timePoint == "14DPI")
grpHBC <- rep(NA, length(c(actHBCId, restHBCId)))
grpHBC[1:length(actHBCId)] <- "activated"
grpHBC[is.na(grpHBC)] <- "resting"
grpHBC <- factor(grpHBC)
countHBC <- counts[, c(actHBCId, restHBCId)]
library(scran)
library(uwot)
library(scater)
##
## Attaching package: 'scater'
## The following object is masked from 'package:clusterExperiment':
##
## plotHeatmap
logFracs <- log(sweep(countHBC+1e-5, 2, colSums(countHBC), "/"))
gv <- modelGeneVar(countHBC)
hvg <- getTopHVGs(gv, n=1000)
pc10 <- irlba::irlba(logFracs[hvg,], nv=10)
um10 <- uwot::umap(pc10$v %*% diag(pc10$d))
plot(um10, col=alpha(c("orange", "darkseagreen3")[as.numeric(grpHBC)], .75), pch=16, cex=1/2, bty='l')
legend("topright", c("activated", "resting"), pch=16, col=c("orange", "darkseagreen3"), bty="n")
library(harmony)
## Loading required package: Rcpp
harmony_embeddings <- HarmonyMatrix(logFracs, meta_data=grpHBC, do_pca=TRUE)
## Harmony 1/10
## Harmony 2/10
## Harmony converged after 2 iterations
umHarmony <- uwot::umap(harmony_embeddings)
plot(umHarmony, col=alpha(c("orange", "darkseagreen3")[as.numeric(grpHBC)], .75), pch=16, cex=1/2, bty='l')
legend("topright", c("activated", "resting"), pch=16, col=c("orange", "darkseagreen3"), bty="n")
library(Signac)
library(Seurat)
##
## Attaching package: 'Seurat'
## The following object is masked from 'package:SummarizedExperiment':
##
## Assays
library(patchwork)
##
## Attaching package: 'patchwork'
## The following object is masked from 'package:cowplot':
##
## align_plots
library(ArchR)
## Loading required package: data.table
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
## The following object is masked from 'package:SummarizedExperiment':
##
## shift
## The following object is masked from 'package:GenomicRanges':
##
## shift
## The following object is masked from 'package:IRanges':
##
## shift
## The following objects are masked from 'package:S4Vectors':
##
## first, second
## Loading required package: rhdf5
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
set.seed(1234)
addArchRGenome("mm10")
## Setting default genome to Mm10.
# scATAC-seq importing and preprocessing
# from 20200610_scATAC_injury_archr_samples_pairwiseHBC_v3.Rmd
archProj <- loadArchRProject("../data/scATAC/archR_samples_pairwise_v2")
## Successfully loaded ArchRProject!
##
## / |
## / \
## . / |.
## \\\ / |.
## \\\ / `|.
## \\\ / |.
## \ / |\
## \\#####\ / ||
## ==###########> / ||
## \\##==......\ / ||
## ______ = =|__ /__ || \\\
## ,--' ,----`-,__ ___/' --,-`-===================##========>
## \ ' ##_______ _____ ,--,__,=##,__ ///
## , __== ___,-,__,--'#' ===' `-' | ##,-/
## -,____,---' \\####\\________________,--\\_##,/
## ___ .______ ______ __ __ .______
## / \ | _ \ / || | | | | _ \
## / ^ \ | |_) | | ,----'| |__| | | |_) |
## / /_\ \ | / | | | __ | | /
## / _____ \ | |\ \\___ | `----.| | | | | |\ \\___.
## /__/ \__\ | _| `._____| \______||__| |__| | _| `._____|
##
clHBCMerged <- archProj$clHBCMerged
plotEmbedding(ArchRProj = archProj, colorBy = "cellColData",
name = "clHBCMerged", embedding = "UMAP_cor")
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-ab2719972642-Date-2021-12-23_Time-18-03-51.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-ab2719972642-Date-2021-12-23_Time-18-03-51.log
archrUmap <- ArchR::getEmbedding(archProj, embedding = "UMAP_cor")
colnames(archrUmap) <- c("UMAP1", "UMAP2")
plotEmbedding(ArchRProj = archProj, colorBy = "cellColData",
name = "treat", embedding = "UMAP_cor")
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-ab271250c42e-Date-2021-12-23_Time-18-03-54.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-ab271250c42e-Date-2021-12-23_Time-18-03-54.log
clHBCMerged2 <- as.character(getCellColData(archProj, "clHBCMerged")$clHBCMerged)
clHBCMerged2[clHBCMerged2 == "C1_Inj"] <- "Activated"
clHBCMerged2[clHBCMerged2 == "C2_Hybrid"] <- "Hybrid"
clHBCMerged2[clHBCMerged2 == "C3_UI"] <- "Resting"
archrUmap$clHBCMerged2 <- factor(clHBCMerged2)
archrUmap$treatment <- getCellColData(archProj, "treat")$treat
peaks <- archProj@peakSet
names(peaks) <- paste0("peak",1:length(peaks))
atacPeakMat <- readRDS("../data/scATAC/peakMatArchrHbc_v2.rds")
rownames(atacPeakMat) <- names(peaks)
geneActivity <- readRDS("../data/scATAC/geneMatArchrHbc.rds")
atac <- Seurat::CreateSeuratObject(counts = atacPeakMat,
assay = "ATAC",
project = "10x_ATAC")
atac[["ACTIVITY"]] <- CreateAssayObject(counts = geneActivity)
atac$tech <- "atac"
atac$treatment <- archProj@cellColData$treat
atac$clHBCMerged <- clHBCMerged
# process gene activity
DefaultAssay(atac) <- "ACTIVITY"
atac <- FindVariableFeatures(atac)
atac <- NormalizeData(atac)
atac <- ScaleData(atac)
## Centering and scaling data matrix
# process peak matrix
DefaultAssay(atac) <- "ATAC"
# VariableFeatures(atac) <- names(which(Matrix::rowSums(atac) > 50))
# atac <- RunLSI(atac, n = 30, scale.max = NULL)
# atac <- RunUMAP(atac, reduction = "lsi", dims = 1:30)
atac <- FindTopFeatures(atac, min.cutoff = 'q0')
# Seurat 3
atac <- RunLSI(atac, n = 30)
## Warning: RunLSI is being moved to Signac. Equivalent functionality can be
## achieved via the Signac::RunTFIDF and Signac::RunSVD functions; for more
## information on Signac, please see https://github.com/timoast/Signac
## Warning: RunLSI is being moved to Signac. Equivalent functionality can be
## achieved via the Signac::RunTFIDF and Signac::RunSVD functions; for more
## information on Signac, please see https://github.com/timoast/Signac
## Warning: RunLSI is being moved to Signac. Equivalent functionality can be
## achieved via the Signac::RunTFIDF and Signac::RunSVD functions; for more
## information on Signac, please see https://github.com/timoast/Signac
## Performing TF-IDF normalization
## Running SVD on TF-IDF matrix
## Scaling cell embeddings
# Seurat 4
#atac <- RunTFIDF(atac)
#atac <- RunSVD(atac, n = 30, reduction.key = "lsi_", reduction.name = "lsi")
# drop first dim as often correlated with seq depth
atac <- RunUMAP(object = atac, reduction = 'lsi', dims = 2:30)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 18:05:20 UMAP embedding parameters a = 0.9922 b = 1.112
## 18:05:20 Read 4076 rows and found 29 numeric columns
## 18:05:20 Using Annoy for neighbor search, n_neighbors = 30
## 18:05:20 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 18:05:20 Writing NN index file to temp file /var/folders/15/gs2v829x4gxcdv0tlgk_5l4m0000gn/T//RtmpXyXqlR/fileab27747fb166
## 18:05:20 Searching Annoy index using 1 thread, search_k = 3000
## 18:05:22 Annoy recall = 100%
## 18:05:23 Commencing smooth kNN distance calibration using 1 thread
## 18:05:25 Initializing from normalized Laplacian + noise
## 18:05:25 Commencing optimization for 500 epochs, with 152690 positive edges
## 18:05:32 Optimization finished
# scRNA-seq import
rna <- CreateSeuratObject(counts=countHBC)
rna$tech <- '10x'
rna$celltype <- grpHBC
rna <- FindVariableFeatures(rna, selection.method = "vst", nfeatures = 2000)
rna <- ScaleData(rna)
## Centering and scaling data matrix
rna <- RunPCA(rna, npcs=20)
## PC_ 1
## Positive: Slc6a6, Rps27, Junb, Aqp3, Btg2, Fos, Fxyd3, Apoe, Krt5, Jun
## Gclm, Socs3, Ier2, Gpm6a, Ubc, Ctsl, Cyr61, Ifitm3, Laptm4a, Atf3
## Gstm2, Csrp1, Ptn, Pltp, Fosb, Dusp1, Igfbp2, Klf4, Krt14, Gas6
## Negative: Tmsb10, Cystm1, Atf5, Plaur, Adam8, Krt6a, Map1b, Tuba1a, Tubb2b, Cst6
## Ptma, Ecm1, Stmn1, Emp3, Uchl1, Upk3bl, Ceacam1, Clcf1, Tm4sf1, Gprc5a
## Cldn7, Tubb6, Lypd3, Cldn3, Lamc2, Phlda2, Ncam1, Cldn4, Mal, Sult2b1
## PC_ 2
## Positive: Igfbp2, Cald1, Tmsb10, Ogn, Tmsb4x, Igfbp4, Adam8, Krt5, Apoe, Plaur
## Fam101b, Tspan6, Abca4, Sparc, Tm4sf1, Col23a1, Lamc2, Krt6a, Clcf1, Tmem108
## Slc29a1, Krt14, Tubb6, Ngf, Cst6, Oaf, Tubb2b, Ecm1, Hmga2, Car12
## Negative: Prdx6, Adh7, Gsta3, Sftpd, Cyp2f2, Ly6a, Tst, Slc16a11, Ifitm1, Reg3g
## Muc1, Por, Cbr2, Adh1, Lrrc26, Selenbp1, Mgst1, Gpx2, Sult1d1, Gstp1
## Cbr3, Cd14, Cyp4b1, Cxcl17, Gsto1, Serpinb11, Elf3, Ly6c1, Serpina3n, Epas1
## PC_ 3
## Positive: Adh7, Aldh3a1, Selenbp1, Adh1, Ly6a, Oat, Defb1, Fmo2, Cbr3, Gstp1
## Tmem176a, Atp1b1, Tmem176b, Efemp1, Igfbp4, Upk1b, Sparc, Ly6c1, Tgm2, Igfbp2
## Mgp, Kcnj16, Sult1d1, Gmpr, Cdo1, Krt15, Nrtn, Cyp4b1, Muc1, Bsg
## Negative: Cldn4, X1810011O10Rik, Klf4, Sfn, Zfp36, S100a10, Mast4, Atf3, Tagln2, Fosb
## Avpi1, Arc, Nr4a1, Cldn7, Nfat5, Egr1, Klf6, Cdkn1a, Phlda1, Lmna
## Crym, Neat1, Anxa1, Krt18, S100a6, Acsm4, Lypd3, Papss2, Ier2, Krt23
## PC_ 4
## Positive: mt.Co1, mt.Co3, mt.Atp6, Hpgd, Pla2r1, X1500009L16Rik, Acsm4, Rps27rt, Car12, Dapl1
## Myo6, Fth1, Papss2, Lypd2, Insl6, Cyb5r3, Scd2, Gas6, Trp63, Nmb
## Eya1, Piezo2, Limd2, Crym, Sema5a, Serpinb1b, Ftl1, X4631405K08Rik, Fndc1, Scgb1c1
## Negative: Actg1, Anxa3, Ier3, Nfkbia, F3, Tmem176b, Krt7, Sfn, Cyr61, Cldn4
## Krt14, Tacstd2, Tuba1c, Tmem176a, Fos, Bhlhe40, Junb, Tnfrsf12a, Gmpr, Cish
## Fst, Cxcl16, Plaur, Krt17, Ubc, Trib1, Oat, Atf3, Adam8, Dusp5
## PC_ 5
## Positive: Fosb, Egr1, Neat1, Irs2, Pdcd4, Nr4a1, Lmna, Klf9, Cbx3, Dst
## Ccdc3, Mast4, Sdc4, Timp3, Nfia, Kdm6b, Birc5, Maff, Spry2, Il6st
## Stmn1, Hist1h2ap, Nfat5, Sik1, Arid5a, Gm26532, Pbk, Gclc, Tpm1, Fam101b
## Negative: Aldh2, Gstm1, Prdx1, Aqp3, Ces1d, Tst, Gsn, Clu, Ephx1, Cpe
## Ifitm3, Ubc, Krt18, Prdx6, Hspb1, Galm, Fmo6, Hspa5, Ppa1, Krt5
## Krt8, Pltp, Mgst1, Ctsl, Cyp2a5, Alas1, Cryl1, Calml4, Gstm2, Actg1
rna <- RunUMAP(rna, dims = 1:20, min_dist=.1)
## Warning: The following arguments are not used: min_dist
## 18:05:36 UMAP embedding parameters a = 0.9922 b = 1.112
## 18:05:36 Read 3064 rows and found 20 numeric columns
## 18:05:36 Using Annoy for neighbor search, n_neighbors = 30
## 18:05:36 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 18:05:37 Writing NN index file to temp file /var/folders/15/gs2v829x4gxcdv0tlgk_5l4m0000gn/T//RtmpXyXqlR/fileab276b57db4a
## 18:05:37 Searching Annoy index using 1 thread, search_k = 3000
## 18:05:38 Annoy recall = 100%
## 18:05:39 Commencing smooth kNN distance calibration using 1 thread
## 18:05:41 Initializing from normalized Laplacian + noise
## 18:05:41 Commencing optimization for 500 epochs, with 122146 positive edges
## 18:05:46 Optimization finished
p1 <- DimPlot(atac, reduction = "umap", group.by="treatment", label=TRUE) +
NoLegend() +
ggtitle("scATAC-seq")
## Warning: Using `as.character()` on a quosure is deprecated as of rlang 0.3.0.
## Please use `as_label()` or `as_name()` instead.
## This warning is displayed once per session.
p2 <- DimPlot(rna, group.by = "celltype", label = TRUE, repel = TRUE) + NoLegend() + ggtitle("scRNA-seq")
p1 + p2
# Markers for activated/resting HBC in scRNA-seq
FeaturePlot(rna, features = "Krtdap")
FeaturePlot(rna, features = "Krt6a")
FeaturePlot(rna, features = "Krt5")
FeaturePlot(rna, features = "Krt14")
FeaturePlot(rna, features = "Trp63")
# Markers for activated/resting HBC in scATAC-seq
FeaturePlot(atac, features = "Krtdap") + xlim(c(-5,7))
## Warning: Could not find Krtdap in the default search locations, found in
## ACTIVITY assay instead
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 14 rows containing missing values (geom_point).
FeaturePlot(atac, features = "Krt6a") + xlim(c(-5,7))
## Warning: Could not find Krt6a in the default search locations, found in ACTIVITY
## assay instead
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 14 rows containing missing values (geom_point).
FeaturePlot(atac, features = "Krt5") + xlim(c(-6,6))
## Warning: Could not find Krt5 in the default search locations, found in ACTIVITY
## assay instead
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 55 rows containing missing values (geom_point).
FeaturePlot(atac, features = "Krt14") + xlim(c(-6,6))
## Warning: Could not find Krt14 in the default search locations, found in ACTIVITY
## assay instead
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 55 rows containing missing values (geom_point).
FeaturePlot(atac, features = "Trp63") + xlim(c(-6,6))
## Warning: Could not find Trp63 in the default search locations, found in ACTIVITY
## assay instead
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 55 rows containing missing values (geom_point).
ctHybridSub <- as.character(rna$celltype)
hlpid <- which(rna@reductions$umap@cell.embeddings[,1] > 2 & rna@reductions$umap@cell.embeddings[,2]>-1)
ctHybridSub[hlpid] <- NA
hlp2id <- which(rna@reductions$umap@cell.embeddings[,1] > 2 & rna@reductions$umap@cell.embeddings[,2]>-1 & rna@reductions$umap@cell.embeddings[,2]<4)
ctHybridSub[hlp2id] <- "hybrid1"
hlp1id <- which(rna@reductions$umap@cell.embeddings[,1] > 2 & rna@reductions$umap@cell.embeddings[,2]>4)
ctHybridSub[hlp1id] <- "hybrid2"
rna$ctHybridSub <- ctHybridSub
DimPlot(rna, group.by = "ctHybridSub", label = FALSE, repel = TRUE) +
ggtitle("scRNA-seq")
table(ctHybridSub)
## ctHybridSub
## activated hybrid1 hybrid2 resting
## 602 130 148 2184
expressionBoxplot <- function(rna, gene){
counts <- as.vector(rna@assays$RNA[gene,])
df <- data.frame(counts=log1p(counts),
celltype=rna$ctHybridSub)
ggplot(df, aes(x=celltype, y=counts)) +
geom_boxplot() +
xlab("Cell state") +
ylab("Log(Expression + 1)") +
theme_bw() +
theme(axis.text.x = element_text(angle=45, vjust=0.5)) +
ggtitle(gene)
}
# Reg3g: respiratory marker
FeaturePlot(rna, features = "Reg3g")
expressionBoxplot(rna, "Reg3g")
plotEmbedding(
ArchRProj = archProj,
colorBy = "GeneScoreMatrix",
name = "Reg3g",
embedding = "UMAP_cor",
quantCut = c(0.01, 0.95),
size=1/3,
plotAs="points")
## Getting ImputeWeights
## No imputeWeights found, returning NULL
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-ab273b3f15f1-Date-2021-12-23_Time-18-05-57.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = GeneScoreMatrix
## Getting Matrix Values...
## 2021-12-23 18:05:57 :
## 1 2 3 4 5 6
## Plotting Embedding
## 1
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-ab273b3f15f1-Date-2021-12-23_Time-18-05-57.log
# FOXJ1: respiratory ciliated marker
FeaturePlot(rna, features = "Foxj1")
expressionBoxplot(rna, "Foxj1")
plotEmbedding(
ArchRProj = archProj,
colorBy = "GeneScoreMatrix",
name = "Foxj1",
embedding = "UMAP_cor",
quantCut = c(0.01, 0.95),
size=1/3,
plotAs="points")
## Getting ImputeWeights
## No imputeWeights found, returning NULL
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-ab272ab99f9a-Date-2021-12-23_Time-18-06-00.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = GeneScoreMatrix
## Getting Matrix Values...
## 2021-12-23 18:06:01 :
## 1 2 3 4 5 6
## Plotting Embedding
## 1
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-ab272ab99f9a-Date-2021-12-23_Time-18-06-00.log
# Muc5ac: respiratory secretory marker
FeaturePlot(rna, features = "Muc5ac")
expressionBoxplot(rna, "Muc5ac")
plotEmbedding(
ArchRProj = archProj,
colorBy = "GeneScoreMatrix",
name = "Muc5ac",
embedding = "UMAP_cor",
quantCut = c(0.01, 0.95),
size=1/3,
plotAs="points")
## Getting ImputeWeights
## No imputeWeights found, returning NULL
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-ab279d4d60-Date-2021-12-23_Time-18-06-04.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = GeneScoreMatrix
## Getting Matrix Values...
## 2021-12-23 18:06:04 :
## 1 2 3 4 5 6
## Plotting Embedding
## 1
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-ab279d4d60-Date-2021-12-23_Time-18-06-04.log
# Cbr2: respiratory basal cell marker (Boying's analyses)
FeaturePlot(rna, features = "Cbr2")
expressionBoxplot(rna, "Cbr2")
plotEmbedding(
ArchRProj = archProj,
colorBy = "GeneScoreMatrix",
name = "Cbr2",
embedding = "UMAP_cor",
quantCut = c(0.01, 0.95),
size=1/3,
plotAs="points")
## Getting ImputeWeights
## No imputeWeights found, returning NULL
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-ab275742d9f0-Date-2021-12-23_Time-18-06-07.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = GeneScoreMatrix
## Getting Matrix Values...
## 2021-12-23 18:06:07 :
## 1 2 3 4 5 6
## Plotting Embedding
## 1
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-ab275742d9f0-Date-2021-12-23_Time-18-06-07.log
# Sult1c1: respiratory basal cell and respiratory epithelial marker (Boying's analyses)
FeaturePlot(rna, features = "Sult1c1")
expressionBoxplot(rna, "Sult1c1")
plotEmbedding(
ArchRProj = archProj,
colorBy = "GeneScoreMatrix",
name = "Sult1c1",
embedding = "UMAP_cor",
quantCut = c(0.01, 0.95),
size=1/3,
plotAs="points")
## Getting ImputeWeights
## No imputeWeights found, returning NULL
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-ab2765f24851-Date-2021-12-23_Time-18-06-10.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = GeneScoreMatrix
## Getting Matrix Values...
## 2021-12-23 18:06:10 :
## 1 2 3 4 5 6
## Plotting Embedding
## 1
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-ab2765f24851-Date-2021-12-23_Time-18-06-10.log
hybrid1Markers <- c("Muc5b", "Sult1c1")
FeaturePlot(rna, features = "Muc5b")
expressionBoxplot(rna, "Muc5b")
FeaturePlot(rna, features = "Sult1c1")
expressionBoxplot(rna, "Sult1c1")
Note that Pax7 seems to fuse with a FOX TF.. https://www.genecards.org/cgi-bin/carddisp.pl?gene=PAX7
hybrid2Markers <- c("Pax3", "Pax7", "Pax9", "Dmrt2", "2610016A17Rik")
FeaturePlot(rna, features = "Pax3")
expressionBoxplot(rna, "Pax3")
FeaturePlot(rna, features = "Pax7")
expressionBoxplot(rna, "Pax7")
FeaturePlot(rna, features = "Pax9")
expressionBoxplot(rna, "Pax9")
FeaturePlot(rna, features = "Dmrt2")
expressionBoxplot(rna, "Dmrt2")
expressionBoxplot2 <- function(rnaWOE, gene){
if(!gene %in% rownames(rnaWOE@assays$RNA)){
return(NULL)
}
counts <- as.vector(rnaWOE@assays$RNA[gene,])
ct <- rnaWOE$celltype
ct[ct == "Activated HBC"] <- "HBC*"
ct[ct == "Respiratory basal cell"] <- "Resp BC"
ct[ct == "Resting HBC"] <- "HBC"
df <- data.frame(counts=log1p(counts),
celltype=ct)
ggplot(df, aes(x=celltype, y=counts)) +
geom_boxplot() +
xlab("Cell state") +
ylab("Log(Expression + 1)") +
theme_bw() +
theme(axis.text.x = element_text(angle=45, vjust=0.5)) +
ggtitle(gene)
}
hybrid1Top10Markers <- c("Epas1", "Ptgfr", "Kcnj1", "Tmc5", "Pla2g2c", "Muc5b",
"B3galt1", "Tacr1", "Sult1c1", "Arhgef38")
hybrid2Top10Markers <- c("Gm32865", "2610016A17Rik", "Pax9", "Pax3", "Atp10a",
"Kcnt2", "Aldh1a7", "Aldh1a1", "Cfh", "Cap2", "Pcare")
# scRNA-seq import of whole OE data
# Import whole OE data
sceWOE <- readRDS("../data/wholeOE/sceWOE.rds")
# scRNA-seq import of lineage-traced data
rnaWOE <- CreateSeuratObject(counts=as.matrix(assays(sceWOE)$counts))
rnaWOE$tech <- '10x'
rnaWOE$celltype <- colData(sceWOE)$seurat_annotated
rnaWOE <- FindVariableFeatures(rnaWOE, selection.method = "vst", nfeatures = 2000)
rnaWOE <- ScaleData(rnaWOE)
## Centering and scaling data matrix
rnaWOE <- RunPCA(rnaWOE, npcs=20)
## PC_ 1
## Positive: Hpgd, Aox2, Abca4, Nrcam, Car12, Azgp1, Abi3bp, Kcnj13, Gpr155, Apoc1
## Dcn, Gpha2, Ccdc153, 1500009L16Rik, Timp3, Sostdc1, Insl6, Itpkb, Irf1, 1200007C13Rik
## Cldn11, Hsd17b6, Defb1, Acsm4, Adh1, H2-Eb1, Cd36, Hist2h4, Fmo3, Gal3st2c
## Negative: Ran, Hsp90ab1, Ranbp1, Eif5a, Hnrnpf, Npm1, Pfn1, Serbp1, Set, Nme1
## Eif4a1, Ybx1, Anp32b, Rps2, Nhp2, Hspd1, Cct2, Ncl, Snrpd1, Ube2m
## Cct8, Prmt1, Hspe1, Hnrnpab, Ppp1r14b, Tubb5, Cct3, Rpsa, Cct5, Pa2g4
## PC_ 2
## Positive: Igfbp2, Nrcam, Basp1, Tinagl1, Car12, Hpgd, Krt5, Krt14, Abca4, Axl
## G0s2, Ogn, Slc1a3, Thbs1, Ass1, Cald1, Fgfbp1, Col18a1, Krt17, Nrg1
## Igfbp4, Fscn1, Itga3, Azgp1, Bin1, 1500009L16Rik, Palld, Pxdc1, Ddah1, Tubb6
## Negative: Tspan1, Serpinb11, Bpifa1, Muc4, Reg3g, Fetub, Pon1, Tst, Gabrp, Tspan13
## Lypd2, Slc16a11, Lrrc26, Tmc5, Slc31a1, Cmtm8, Clic6, Slc44a4, Cyp2j6, Mlph
## Cxcl17, Xbp1, Sec11c, Myo5c, Fam174b, 5330417C22Rik, Muc16, Vtcn1, Pdzk1ip1, Tff2
## PC_ 3
## Positive: Stmn1, Spc24, Tk1, Cenph, Pbk, Clspn, Smc2, Cenpw, Cenpm, Asf1b
## Smc4, Ndc80, Ccdc34, Pclaf, Atad2, Hmgb2, Tacc3, Cenpk, Tyms, Cdk1
## Anln, Birc5, Rad51, Rrm2, H2afz, Aurkb, Rrm1, Melk, Rad51ap1, Bub1
## Negative: Cldn4, Pttg1ip, Plaur, Adam8, Lamc2, Pls3, Fst, Rbms1, St14, Ngf
## Cdh1, Capn2, Ifi202b, Plin2, Irf6, Zfp593, Nabp1, Palld, Rtn4, Tnfrsf12a
## Msn, Sfn, Rnf19b, Hbegf, Clcf1, Tubb6, Fosl1, Prrg4, Taldo1, Glrx
## PC_ 4
## Positive: Tgm2, Adh7, 4833423E24Rik, Gpx2, Abi3bp, Gsto1, Ly6a, Adh1, Ly6d, Anxa8
## Serpinb5, Mgp, Defb1, Cd14, Cfh, Fmo3, Pax9, Pir, Gm20186, Cdc14a
## Cdh13, Slc39a4, Col25a1, Capg, Pcolce, Sat1, Gdpd2, Rbp1, Irx3, Gda
## Negative: Nrcam, Basp1, Aox2, Car12, Hpgd, Azgp1, Igfbp2, Ermn, 1500009L16Rik, Scgb1c1
## Mdk, Abca4, G0s2, Insl6, Nt5dc2, Calml4, Galm, Pdlim3, Tmem108, Ogn
## Plscr2, Nectin3, Hsd17b6, Kcnj13, Sostdc1, Gal3st2c, Pcdh17, Crabp2, Slc1a3, Mef2c
## PC_ 5
## Positive: Anxa1, Lmo7, Nectin2, Cldn3, Ctsh, St14, Tubb2b, Cldn7, Rab11fip1, Ceacam1
## Crabp2, Cryab, Espn, Cldn6, Mal, Cdh1, Cxcl16, Blnk, Elf5, Cldn9
## Hebp2, Cldn4, Crb3, Tjp2, Gm14137, Akap12, Ndrg1, Elf3, Tacstd2, Metrnl
## Negative: Htra1, Nrg1, Ung, Slc16a1, Bpifa1, Glyat, Srm, Ano1, Mcm7, Sssca1
## Npm3, Tff2, Dctpp1, Pold2, Psmc3ip, Cotl1, Lypd2, Bpifb1, Aldh1a1, Pdzrn4
## Tipin, Axl, Ccbe1, Cdca7, Tcn2, Pon1, Reg3g, Nes, Sigmar1, Muc16
rnaWOE <- RunUMAP(rnaWOE, dims = 1:20, min_dist=.1)
## Warning: The following arguments are not used: min_dist
## 18:06:28 UMAP embedding parameters a = 0.9922 b = 1.112
## 18:06:28 Read 2954 rows and found 20 numeric columns
## 18:06:28 Using Annoy for neighbor search, n_neighbors = 30
## 18:06:28 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 18:06:28 Writing NN index file to temp file /var/folders/15/gs2v829x4gxcdv0tlgk_5l4m0000gn/T//RtmpXyXqlR/fileab2788bef75
## 18:06:28 Searching Annoy index using 1 thread, search_k = 3000
## 18:06:29 Annoy recall = 100%
## 18:06:30 Commencing smooth kNN distance calibration using 1 thread
## 18:06:32 Initializing from normalized Laplacian + noise
## 18:06:32 Commencing optimization for 500 epochs, with 123922 positive edges
## 18:06:38 Optimization finished
pp1 <- pp2 <- list()
for(gg in 1:9){
pp1[[gg]] <- expressionBoxplot2(rnaWOE, gene=hybrid1Top10Markers[gg])
pp2[[gg]] <- expressionBoxplot2(rnaWOE, gene=hybrid2Top10Markers[gg])
}
cowplot::plot_grid(plotlist=pp1)
cowplot::plot_grid(plotlist=pp2)
# Would the subset of respiratory basal cells that express Krt5, Muc5b and Sult1c1 actually be hybrid cells? It looks like it based on markers...
p1 <- FeaturePlot(rnaWOE, hybrid1Top10Markers[1:9], combine = FALSE)
p1Norm <- FeaturePlot(NormalizeData(rnaWOE), hybrid1Top10Markers[1:9], combine = FALSE)
for(i in 1:length(p1)) {
p1[[i]] <- p1[[i]] + NoLegend()
p1Norm[[i]] <- p1Norm[[i]] + NoLegend()
}
cowplot::plot_grid(plotlist = p1)
cowplot::plot_grid(plotlist = p1Norm)
p2 <- FeaturePlot(rnaWOE, hybrid2Top10Markers[1:9], combine = FALSE)
## Warning in FetchData(object = object, vars = c(dims, "ident", features), : The
## following requested variables were not found: Gm32865
p2Norm <- FeaturePlot(NormalizeData(rnaWOE), hybrid2Top10Markers[1:9], combine = FALSE)
## Warning in FetchData(object = object, vars = c(dims, "ident", features), : The
## following requested variables were not found: Gm32865
for(i in 1:length(p2)) {
p2[[i]] <- p1[[i]] + NoLegend()
p2Norm[[i]] <- p1Norm[[i]] + NoLegend()
}
cowplot::plot_grid(plotlist = p2)
cowplot::plot_grid(plotlist = p2Norm)